package com.kdtech.analyse.Bbs;
import com.kdtech.crawler.at.UrlArgumentTop;
import com.kdtech.utils.DateUtils;
import com.kdtech.utils.DoMainUtils;
import com.kdtech.utils.NumberUtils;

import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;




import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import com.kdtech.crawler.CrawlHTML;
import com.kdtech.entity.crawler.UrlMeta;
import com.kdtech.entity.data.NewsMeta;
import com.kdtech.analyse.AnalyseNews;
import com.kdtech.utils.DoMainUtils;
import com.kdtech.utils.RegexUtils;
import com.kdtech.utils.StringUtils;

/**
 * 二千沙龙 Bbs 解析类
 * 主页:http://www.c2000.cn/index.htm
 * @author dhy
 *
 */
public class C2000BbsAnalyse implements AnalyseNews{
	
	public boolean isDetailPage(String url) {
		String[] regex = {
				"http://www.c2000.cn/dispbbs.asp[?]boardID=[0-9]*&ID=[0-9]*&page=[0-9]*",
				"http://www.c2000.cn/dispbbs.asp\\?boardID=[0-9]*&id=[0-9]*.*",
				"http://www.c2000.cn/dispbbs.asp\\?boardID=[0-9]*&ID=[0-9]*",
		};
		return RegexUtils.matchAnyIgnoreCase(url, regex);
	}

	public static HashMap<String, String> getLinks(String html) {
		HashMap<String, String> links = new HashMap<String, String>();
		try {
			String regex = "dvbbs_topic_list\\(([^)]*)\\)";
//			System.out.println(html);
			if(html!=null){
				Pattern pattern = Pattern.compile(regex);
				Matcher matcher = pattern.matcher(html);
				while (matcher.find()) {
					String matchs = matcher.group(1);
					String id= StringUtils.substringBetween(matchs, "TempStr,'", "'");
					String bid = StringUtils.substringBetween(matchs, "','", "'");
					if(id!=null && bid!=null){
						String url = String.format("http://www.c2000.cn/dispbbs.asp?boardID=%s&ID=%s",bid,id);
//					System.out.println(url);
						links.put(url, "");
					}
				}
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
		return links;
	}

	
	public NewsMeta parserHtml(UrlMeta urlMeta) {
		NewsMeta bbs = new NewsMeta();
		if (urlMeta.getHtml() == null) {
		}
		String htmltxt = urlMeta.getHtml();
		String url = urlMeta.getUrl();

		bbs.setUrl(url);
		String title = null;
		String content = null;
		Long date = null;
		String commnetNum = null;
		String clickNum = null;
		String author = null;

		Document doc = Jsoup.parse(htmltxt);

		String mainHtmltxt = StringUtils.substringAfter(htmltxt, "TempStr='");
		title = doc.select("th[width=73%]").text();
		if(title!=null) title = title.replace("* 贴子主题：", "");
		content = StringUtils.substringBetween(mainHtmltxt, "dvbbs_show_topic", "));");
		for (int i = 0; i < 11; i++) {
			content = StringUtils.substringAfter(content, "','");
		}
		for (int i = 0; i < 17; i++) {
			if(content!=null) content = StringUtils.substringBeforeLast(content, "','");
		}
		if(content!=null) content = content.replace("','", "");
		date = DateUtils.matchDate(StringUtils.substringBetween(htmltxt, "TempStr", "</script>"));

		author =StringUtils.substringBetween(mainHtmltxt, "dvbbs_show_topic", "));");
		for (int i = 0; i < 2; i++) {
			author = StringUtils.substringAfter(author, "','");
		}
		author = StringUtils.substringBefore(author, "','");

		bbs.setTitle(title);
		bbs.setContent(content);
		bbs.setDate(date);
		/**
		 * 解析用于更新的地址
		 */
		bbs.setUpdateUrl(url);
		bbs.setClickNum(NumberUtils.parseInt(clickNum));
		bbs.setCommentNum(NumberUtils.parseInt(commnetNum));
		bbs.setAuthor(author);
		return bbs;
	}

	

	
	public NewsMeta Update(NewsMeta meta) {
		if(meta!=null){
			String updateUrl = meta.getUpdateUrl();
			if (StringUtils.isNotBlank(updateUrl)){
				UrlMeta urlMeta = CrawlHTML.responseToURL(updateUrl);
				return parserHtml(urlMeta);
			}
			return null;
		}
		return null;
	}

	public static void main(String[] args) {
		C2000BbsAnalyse a = new C2000BbsAnalyse();
//		String url = "http://www.c2000.cn/list.asp?boardid=34&page=1";
//		HashMap<String, String> links = a.getLinks(url);
//		System.out.println(links);
		String url = "http://www.c2000.cn/dispbbs.asp?boardID=34&ID=2723792";
		System.out.println(DoMainUtils.GetDomainName(url));
		System.out.println(url);
		if (!a.isDetailPage(url)) {
			System.out.println("不匹配规则");
		} else {
			UrlMeta meta = CrawlHTML.responseToURL(url);
			NewsMeta parserHtml = a.parserHtml(meta);
			System.out.println(parserHtml);
//			System.out.println(a.Update(parserHtml));
		}
	}


}
